Chapter 11
Lines for time series, w/ time on the x-axis
# data from: April 18, 2017
library(tidyverse)
df <- read_csv("mortgage.csv")
df <- df %>% gather(key = TYPE, value = RATE, -DATE) %>%
mutate(TYPE = forcats::fct_reorder2(TYPE, DATE, RATE))# puts legend incorrect order
g <- ggplot(df, aes(DATE, RATE, color = TYPE)) + geom_line() +
ggtitle("U.S. Mortgage Rates") +
labs (x = "", y = "percent") +
theme_grey(16) +
theme(legend.title = element_blank())
gggplot(df, aes(DATE, RATE, fill = TYPE)) + geom_col() +
ggtitle("U.S. Mortgage Rates", subtitle = "Not recommended!")library(lubridate)
df2010 <- df %>% filter(year(DATE) == 2010)
ggplot(df2010, aes(DATE, RATE, color = TYPE)) + geom_line() +
ggtitle("U.S. Mortgage Rates")dfman <- read_csv("ManchesterByTheSea.csv")
g <- ggplot(dfman, aes(Date, Gross)) +
geom_line() +
ggtitle("Manchester by the Sea",
"Daily Gross (US$), United States") +
xlab("2016-2017")
gg <- ggplot(dfman, aes(Date, Gross)) + geom_point()
g + geom_line(color = "grey50") +geom_smooth(method = "loess", se = FALSE, lwd =1.5) + ggtitle("Loess smoother")g <- ggplot(dfman, aes(Date, Gross)) + geom_line() +
ggtitle("Manchester by the Sea",
"Daily Gross, United States")
saturday <- dfman %>% filter(wday(Date) == 7)
g + geom_point(data = saturday, aes(Date, Gross),
color = "deeppink")ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE))ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE)) +
geom_smooth(se = FALSE)christmas <- dfman %>%
filter(Date >= as.Date("2016-12-20") &
Date <= ("2017-01-03"))
ggplot(christmas, aes(Date, Gross)) +
geom_label(aes(label = wday(Date, label = TRUE))) +
geom_line(color = "cornflowerblue") +
scale_x_date(date_labels = "%b\n%d",
date_breaks = "1 day")ggplot(christmas, aes(Date, Gross/1000000)) +
geom_line(color = "cornflowerblue", lwd = 1.1) +
geom_point(color = "cornflowerblue", size = 2) +
geom_label(data = christmas,
aes(x = Date, y = Gross/1000000 + .06,
label = day(Date))) +
scale_x_date(date_labels = "%a",
date_breaks = "1 day") +
ggtitle("Manchester by the Sea",
"Chistmas Week Box Office Gross") +
labs(x = "Dec 2016 - Jan 2017",
y = "Daily Gross (in millions $US)") +
theme_grey(14)# annotate Christmas Week
start <- as.Date("2016-12-24")
end <- as.Date("2017-01-02")
g + annotate("rect", xmin = start, xmax = end,
ymin = -Inf, ymax = Inf, fill = "green",
alpha = .2) +
annotate("text", x = end + 2,
y = 1500000, label = "Dec 24 - Jan 2",
color = "green", hjust = 0) +
theme_classic()set.seed(5702)
tidydf <- data.frame(time = 1:6, y1 = round(rnorm(6, 10,5), 2), y2 = round(rnorm(6, 100, 50), 2)) %>%
gather(key, value, -time)
ggplot(tidydf, aes(time, value, col = key)) +
geom_line()Each value is divided by the first value for that group and multiplied by 100:
| time | key | value |
|---|---|---|
| 1 | y1 | 9.03 |
| 2 | y1 | 10.75 |
| 3 | y1 | 11.03 |
| 4 | y1 | 8.18 |
| 5 | y1 | 13.06 |
| 6 | y1 | 4.14 |
| 1 | y2 | 81.22 |
| 2 | y2 | 85.54 |
| 3 | y2 | 128.26 |
| 4 | y2 | 20.15 |
| 5 | y2 | 116.81 |
| 6 | y2 | 164.87 |
tidydf <- tidydf %>% group_by(key) %>%
mutate(index = round(100*value/value[1], 2)) %>%
ungroup()
tidydf| time | key | value | index |
|---|---|---|---|
| 1 | y1 | 9.03 | 100.00 |
| 2 | y1 | 10.75 | 119.05 |
| 3 | y1 | 11.03 | 122.15 |
| 4 | y1 | 8.18 | 90.59 |
| 5 | y1 | 13.06 | 144.63 |
| 6 | y1 | 4.14 | 45.85 |
| 1 | y2 | 81.22 | 100.00 |
| 2 | y2 | 85.54 | 105.32 |
| 3 | y2 | 128.26 | 157.92 |
| 4 | y2 | 20.15 | 24.81 |
| 5 | y2 | 116.81 | 143.82 |
| 6 | y2 | 164.87 | 202.99 |
set.seed(5702)
day <- 1:31
number <- 10 * (day - 14)^2 + 2000 + rnorm(1:31, 0, 400)
df <- data.frame(day, number)
ggplot(df, aes(day, number)) +
geom_line(color = "deeppink") +
geom_point(color = "deeppink") +
scale_x_continuous(breaks = 1:31) +
scale_y_continuous(limits = c(0, 5000)) +
ggtitle("Average Motor Vehicle Inspections per Day",
subtitle = "(fake data)") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))ggplot(df, aes(day, number)) +
geom_col() +
scale_x_continuous(breaks = 1:31) +
ggtitle("Average Motor Vehicle Inspections per Day") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))# read file
mydat <- read_csv("WA_Sales_Products_2012-14.csv") %>%
mutate(Revenue = Revenue/1000000)
# convert Quarter to a single numeric value Q
mydat$Q <- as.numeric(substr(mydat$Quarter, 2, 2))
# convert Q to end-of-quarter date
mydat$Date <- as.Date(paste0(mydat$Year, "-",
as.character(mydat$Q*3),
"-30"))
# Check that dates look ok
# unique(mydat$Date)Methoddata <- mydat %>% group_by(Date, `Order method type`) %>%
summarize(Revenue = sum(Revenue))
g <- ggplot(Methoddata, aes(Date, Revenue,
color = `Order method type`)) +
geom_line(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
gNo2013 <- Methoddata %>% filter(year(Date) != 2013)
g <- ggplot(No2013, aes(Date, Revenue,
color = `Order method type`)) +
geom_line(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
g(set missing values to NA)
Methoddata$Date[year(Methoddata$Date)==2013] <- NA
g <- ggplot(Methoddata, aes(Date, Revenue,
color = `Order method type`)) +
geom_path(aes(group = `Order method type`)) +
scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
date_breaks = "6 months", date_labels = "%b %Y") +
ylab("Revenue in mil $")
g